Run scripts and load objects
source("~scripts/00 - Admin.R")
source("~scripts/01 - Utility Functions.R")
source("~scripts/10 - Read Siegel Data.R")
guns_df <- readRDS("~outputs/10/11_guns_df.rds")
guns_clean <- readRDS("~outputs/20/21_guns_clean.rds")
guns_list <- readRDS("~outputs/20/21_guns_list.rds")
siegelSum <- readRDS("~outputs/20/20_siegelSum.RDS")
guns_list_shp <- readRDS("~outputs/20/21_guns_list_shp.rds")
guns_list_shp_byYear <- readRDS("~outputs/20/21_guns_list_shp_byYear.rds")
source("~scripts/31 - Explore crime data.R")
tracts_crimeCounts <- readRDS("~outputs/30/33_tracts_crimeCounts.rds")
BGs_crimeCounts <- readRDS("~outputs/30/33_BGs_crimeCounts.rds")
BG_selection_list <- readRDS("~outputs/20/22_BG_selection_list.rds")
tract_selection_list <- readRDS("~outputs/20/22_tract_selection_list.rds")
# source("~scripts/10 - Read Siegel Data.R")
# source("~scripts/11 - Read crime data.R")
# guns_df <- readRDS("~outputs/10/11_guns_df.rds")
# source("~scripts/21 - Clean crime data.R")
# guns_clean <- readRDS("~outputs/20/21_guns_clean.rds")
# guns_list <- readRDS("~outputs/20/21_guns_list.rds")
Sample of data for testing
set.seed(1)
guns_sample_df <- sample_n(guns_df, 10000)
guns_sample_ls <- guns_sample_df %>%
split(., # split into nested list of dfs by city
f = .$city)
# source("~scripts/12 - Read census data.R")
Date range: 1991-2019
range(siegel_raw$year)
## [1] 1991 2019
134 different laws…
unique(siegel_raw$law)
## [1] age18longgunpossess age18longgunsale
## [3] age21handgunpossess age21handgunsale
## [5] age21longgunpossess age21longgunsale
## [7] age21longgunsaled alcoholism
## [9] alctreatment amm18
## [11] amm21h ammbackground
## [13] ammlicense ammpermit
## [15] ammrecords ammrestrict
## [17] assault assaultlist
## [19] assaultregister assaulttransfer
## [21] backgroundpurge cap14
## [23] cap16 cap18
## [25] capaccess capliability
## [27] capunloaded capuses
## [29] ccbackground ccbackgroundnics
## [31] ccrenewbackground ccrevoke
## [33] college collegeconcealed
## [35] danger dealer
## [37] dealerh defactoreg
## [39] defactoregh drugmisdemeanor
## [41] dvro dvrodating
## [43] dvroremoval dvrosurrender
## [45] dvrosurrenderdating dvrosurrendernoconditions
## [47] elementary exparte
## [49] expartedating expartesurrender
## [51] expartesurrenderdating expartesurrendernoconditions
## [53] felony fingerprint
## [55] gunshow gunshowh
## [57] gvro gvrolawenforcement
## [59] immunity incidentall
## [61] incidentremoval inspection
## [63] invcommitment invoutpatient
## [65] junkgun liability
## [67] lockd locked
## [69] lockp lockstandards
## [71] loststolen magazine
## [73] magazinepreowned mayissue
## [75] mcdv mcdvdating
## [77] mcdvremovalallowed mcdvremovalrequired
## [79] mcdvsurrender mcdvsurrenderdating
## [81] mcdvsurrendernoconditions mentalhealth
## [83] microstamp nosyg
## [85] onefeature onepermonth
## [87] opencarryh opencarryl
## [89] opencarrypermith opencarrypermitl
## [91] permit permitconcealed
## [93] permith permitlaw
## [95] personalized preemption
## [97] preemptionbroad preemptionnarrow
## [99] purge recordsall
## [101] recordsallh recordsdealer
## [103] recordsdealerh registration
## [105] registrationh relinquishment
## [107] reportall reportallh
## [109] reportdealer reportdealerh
## [111] residential security
## [113] showing stalking
## [115] statechecks statechecksh
## [117] strawpurchase strawpurchaseh
## [119] tenroundlimit theft
## [121] threedaylimit traffickingbackground
## [123] traffickingprohibited traffickingprohibitedh
## [125] training universal
## [127] universalh universalpermit
## [129] universalpermith violent
## [131] violenth violentpartial
## [133] waiting waitingh
## 134 Levels: age18longgunpossess age18longgunsale ... waitingh
Organized into 14 categories…
unique(siegel_raw$Category)
## [1] possession.regulations
## [2] buyer.regulations
## [3] prohibitions.for.high-risk.gun.possession
## [4] ammunition.regulations
## [5] assault.weapons.and.large-capacity.magazines
## [6] background.checks
## [7] child.access.prevention
## [8] concealed.carry.permitting
## [9] dealer.regulations
## [10] domestic.violence
## [11] immunity
## [12] gun.trafficking
## [13] stand.your.ground
## [14] preemption
## 14 Levels: ammunition.regulations ...
And 50 sub-categories
unique(siegel_raw$Sub.Category)
## [1] Age restrictions
## [2] Alcohol
## [3] Background checks
## [4] Licensing
## [5] Permitting
## [6] Recordkeeping
## [7] Prohibitors
## [8] Assault weapons ban
## [9] Background check records
## [10] Storage
## [11] Campus carry
## [12] Mental Health
## [13] Registration
## [14] Drugs
## [15] Restraining order
## [16] School zones
## [17] Felony
## [18] Fingerprinting
## [19] Gun shows
## [20] Gun violence restraining orders
## [21] Immunity
## [22] Firearm removal
## [23] Inspections
## [24] Junk guns
## [25] Liability
## [26] Safety locks
## [27] Theft reporting
## [28] Large capacity magazine ban
## [29] Misdemeanor crimes
## [30] Background checks - mental health records
## [31] Crime gun identification
## [32] Stand your ground
## [33] Bulk purchase limit
## [34] Open carry
## [35] Personalized gun technology
## [36] Preemption
## [37] Reporting
## [38] Relinquishment of weapons
## [39] Location
## [40] Security
## [41] Stalking
## [42] Background checks - state records
## [43] Straw purchase
## [44] Background checks time limit
## [45] Gun trafficking
## [46] Safety training
## [47] Universal background checks
## [48] Background checks through permits
## [49] Violent Misdemeanor
## [50] Waiting period
## 50 Levels: Age restrictions Alcohol ... Waiting period
# source("~scripts/20 - Clean Siegel data.R")
# siegelSum <- readRDS("~outputs/20/20_siegelSum.RDS")
ggplot(siegelSum,
aes(x = year,
y = score)) +
geom_line(size = 1) +
# geom_point() +
facet_wrap(~ state, ncol = 5, scales = "free_x") +
plotTheme() +
scale_x_continuous(breaks = seq(min(siegelSum$year), max(siegelSum$year), 5)) +
labs(title = "Siegel Scores",
x = "Year",
y = "Score (sum of gun laws)") +
theme(panel.spacing.x = unit(8, "mm"))
Number of Cities: 34 Number of States: 29
unique(guns_clean$city)
## [1] "Atlanta" "Auburn" "Baltimore"
## [4] "Baton Rouge" "Boston" "Chicago"
## [7] "Cincinnati" "Columbia" "Dallas"
## [10] "Denver" "Detroit" "Gainesville"
## [13] "Hartford" "Indianapolis" "Kansas City"
## [16] "Lincoln" "Little Rock" "Los Angeles"
## [19] "Louisville" "Madison" "Minneapolis"
## [22] "Nashville" "New York" "Phoenix"
## [25] "Portland" "Raleigh" "Sacramento County"
## [28] "Saint Paul" "Salt Lake City" "San Francisco"
## [31] "St Louis County" "Tucson" "Virginia Beach"
unique(guns_clean$state)
## [1] "Georgia" "Washington" "Maryland" "Louisiana"
## [5] "Massachusetts" "Illinois" "Ohio" "South Carolina"
## [9] "Texas" "Colorado" "Michigan" "Florida"
## [13] "Connecticut" "Indiana" "Missouri" "Nebraska"
## [17] "Arkansas" "California" "Kentucky" "Wisconsin"
## [21] "Minnesota" "Tennessee" "New York" "Arizona"
## [25] "Oregon" "North Carolina" "Utah" "Virginia"
Date Range:
plan(multiprocess)
guns_sample_ls <- future_map(guns_sample_ls,
~ .x %>%
mutate(clean_occur_date = anydate(occurdate), # use built-in formats from anytime package
# correct some incorrectly parsed observations
clean_occur_date = case_when(occurdate == "1" ~ as.Date(NA),
clean_occur_date < as.Date("1900-01-01") ~ as.Date(NA),
is.na(clean_occur_date) &
str_detect(occurdate,
".*\\d+/\\d+/\\d+.*") ~ # e.g "12/3/15", "12/3/15 1600"
as.Date(occurdate, "%m/%d/%y"),
TRUE ~ clean_occur_date),
clean_report_date = anydate(reportdate),
clean_report_date = case_when(reportdate == "1" ~ as.Date(NA),
clean_report_date < as.Date("1900-01-01") ~ as.Date(NA),
is.na(clean_report_date) &
str_detect(reportdate,
".*\\d+/\\d+/\\d+.*") ~ # e.g "12/3/15", "12/3/15 1600"
as.Date(reportdate, "%m/%d/%y"),
TRUE ~ clean_report_date)))
range(guns_clean$clean_occur_date, na.rm = TRUE)
## [1] "1922-06-19" "2020-05-01"
range(guns_clean$clean_report_date, na.rm = TRUE)
## [1] "1960-10-03" "2020-05-01"
Benchmarking results show furrr is fastest for manipulating the data.
benchmark_results <- benchmark("flat" = {
flat <- guns_sample_df %>%
mutate(clean_occur_date = anydate(occurdate), # use built-in formats from anytime package
# correct some incorrectly parsed observations
clean_occur_date = case_when(occurdate == "1" ~ as.Date(NA),
is.na(clean_occur_date) &
str_detect(occurdate,
".*\\d+/\\d+/\\d+.*") ~ # e.g "12/3/15", "12/3/15 1600"
as.Date(occurdate, "%m/%d/%y"),
TRUE ~ clean_occur_date),
clean_report_date = anydate(reportdate),
clean_report_date = case_when(is.na(clean_report_date) &
str_detect(reportdate,
".*\\d+/\\d+/\\d+.*") ~ # e.g "12/3/15", "12/3/15 1600"
as.Date(reportdate, "%m/%d/%y"),
TRUE ~ clean_report_date))}, # benchmarking shows the furrr approach is fastest (but still not very fast)
"purrr" = {map(guns_sample_ls,
~ .x %>%
mutate(clean_occur_date = anydate(occurdate), # use built-in formats from anytime package
# correct some incorrectly parsed observations
clean_occur_date = case_when(occurdate == "1" ~ as.Date(NA),
is.na(clean_occur_date) &
str_detect(occurdate,
".*\\d+/\\d+/\\d+.*") ~ # e.g "12/3/15", "12/3/15 1600"
as.Date(occurdate, "%m/%d/%y"),
TRUE ~ clean_occur_date),
clean_report_date = anydate(reportdate),
clean_report_date = case_when(is.na(clean_report_date) &
str_detect(reportdate,
".*\\d+/\\d+/\\d+.*") ~ # e.g "12/3/15", "12/3/15 1600"
as.Date(reportdate, "%m/%d/%y"),
TRUE ~ clean_report_date)))},
# plan(multiprocess)
"furrr" = {future_map(guns_sample_ls,
~ .x %>%
mutate(clean_occur_date = anydate(occurdate), # use built-in formats from anytime package
# correct some incorrectly parsed observations
clean_occur_date = case_when(occurdate == "1" ~ as.Date(NA),
is.na(clean_occur_date) &
str_detect(occurdate,
".*\\d+/\\d+/\\d+.*") ~ # e.g "12/3/15", "12/3/15 1600"
as.Date(occurdate, "%m/%d/%y"),
TRUE ~ clean_occur_date),
clean_report_date = anydate(reportdate),
clean_report_date = case_when(is.na(clean_report_date) &
str_detect(reportdate,
".*\\d+/\\d+/\\d+.*") ~ # e.g "12/3/15", "12/3/15 1600"
as.Date(reportdate, "%m/%d/%y"),
TRUE ~ clean_report_date)))},
replications = 1
)
Data:
# source("~scripts/21 - Clean crime data.R")
# guns_list_shp <- readRDS("~outputs/20/21_guns_list_shp.rds")
Source script:
# source("~scripts/31 - Explore crime data.R")
Crime counts by city
gunIncident_summary %>%
arrange(desc(prop)) %>%
kable() %>%
kable_styling()
| city | gun_count | all_crimes_count | prop |
|---|---|---|---|
| Madison | 3867 | 12991 | 0.2976676 |
| Los Angeles | 280066 | 2114091 | 0.1324758 |
| St Louis County | 6844 | 65111 | 0.1051128 |
| Baltimore | 30264 | 300336 | 0.1007671 |
| Atlanta | 32802 | 349277 | 0.0939140 |
| Nashville | 90126 | 1088679 | 0.0827847 |
| Chicago | 535385 | 7105053 | 0.0753527 |
| Baton Rouge | 26374 | 456033 | 0.0578335 |
| Indianapolis | 34232 | 750728 | 0.0455984 |
| Kansas City | 89619 | 2143395 | 0.0418117 |
| Hartford | 25551 | 668332 | 0.0382310 |
| Saint Paul | 9050 | 248359 | 0.0364392 |
| Sacramento County | 12962 | 362005 | 0.0358061 |
| Dallas | 38298 | 1089765 | 0.0351434 |
| Columbia | 793 | 25459 | 0.0311481 |
| Minneapolis | 6654 | 217760 | 0.0305566 |
| San Francisco | 71555 | 2546472 | 0.0280997 |
| Virginia Beach | 3053 | 124693 | 0.0244841 |
| New York | 162424 | 6847944 | 0.0237187 |
| Louisville | 29540 | 1330690 | 0.0221990 |
| Denver | 9819 | 563795 | 0.0174159 |
| Lincoln | 4192 | 241098 | 0.0173871 |
| Boston | 12769 | 746868 | 0.0170967 |
| Portland | 2949 | 214817 | 0.0137280 |
| Tucson | 22545 | 1828868 | 0.0123273 |
| Raleigh | 7230 | 719797 | 0.0100445 |
| Cincinnati | 3507 | 395676 | 0.0088633 |
| Gainesville | 1319 | 157253 | 0.0083878 |
| Auburn | 182 | 22383 | 0.0081312 |
| Detroit | 5684 | 872947 | 0.0065113 |
| Salt Lake City | 4254 | 793509 | 0.0053610 |
| Little Rock | 215 | 86909 | 0.0024739 |
| Phoenix | 594 | 294292 | 0.0020184 |
gunIncidentsByCityPlot <- readRDS("~outputs/30/31c_gunIncidentsByCityPlot.rds")
gunIncidentsByCityPlot
How many NA observations?
na_coords_summary <- map(guns_list,
~ sum(is.na(.x$lon) | is.na(.x$lat)) /
nrow(.x)) %>%
bind_rows() %>%
gather(key = "City",
value = "pct_NA")
na_coords_summary %>%
arrange(desc(pct_NA)) %>%
kable() %>%
kable_styling()
| City | pct_NA |
|---|---|
| Portland | 0.1254845 |
| Boston | 0.0574702 |
| Kansas City | 0.0491456 |
| Baton Rouge | 0.0464759 |
| Dallas | 0.0391989 |
| Sacramento County | 0.0353341 |
| New York | 0.0317222 |
| Raleigh | 0.0298574 |
| St Louis County | 0.0246932 |
| Lincoln | 0.0238797 |
| Cincinnati | 0.0208155 |
| Auburn | 0.0200000 |
| Nashville | 0.0186494 |
| Little Rock | 0.0186047 |
| Salt Lake City | 0.0179098 |
| Philadelphia | 0.0106018 |
| Gainesville | 0.0068234 |
| Tucson | 0.0066634 |
| Louisville | 0.0054160 |
| Chicago | 0.0050338 |
| Baltimore | 0.0047251 |
| Minneapolis | 0.0025467 |
| Saint Paul | 0.0022080 |
| San Francisco | 0.0018446 |
| Detroit | 0.0016895 |
| Madison | 0.0012930 |
| Indianapolis | 0.0009932 |
| Virginia Beach | 0.0009826 |
| Denver | 0.0008099 |
| Los Angeles | 0.0004548 |
| Atlanta | 0.0000000 |
| Columbia | 0.0000000 |
| Hartford | 0.0000000 |
| Phoenix | 0.0000000 |
CHANGE THIS TO QUADRAT ANALYSIS MAPS
What is Moran’s I?
Inferential statistic ranging from -1 to 1 that describes the level of dispersal/clustering evident in spatial data. Associated with a p-value that provides the statistical significance of the estimate.
Moran’s I range
That assumes a uniform intensity to all values. We need to choose some value for determining an “intensity” for the areas. For now, I went with “gun crimes per 100 people”. Is there another metric that might be better?
Read in the study area geographies w/ crimes per 100, raw crime counts, and population per tract.
Another question is the geographic scope we look at. Some choices. I looked at Census-designated place for now, because those generally align with published city borders.
A concave or convex hull may better fit the data, though.
# source("~scripts/33 - Aggregate crimes and geographies.R")
# tracts_crimeCounts <- readRDS("~outputs/30/33_tracts_crimeCounts.rds")
# BGs_crimeCounts <- readRDS("~outputs/30/33_BGs_crimeCounts.rds")
tmap::tmap_mode("view")
tmap::qtm(tracts_crimeCounts$byCaveHull$`San Francisco`, title = "Concave hull of crimes")
tmap::qtm(tracts_crimeCounts$byVexHull$`San Francisco`, title = "Convex hull of crimes")
tmap::qtm(tracts_crimeCounts$byCounty$`San Francisco`, title = "County")
tmap::qtm(tracts_crimeCounts$byPlace$`San Francisco`, title = "Census-designated place")
# source("~scripts/34 - Calculate Moran's I.R")
tracts_I <- readRDS("~outputs/30/34_tracts_I.rds")
BGs_I <- readRDS("~outputs/30/34_BGs_I.rds")
tracts_pop_I <- readRDS("~outputs/30/34_tracts_pop_I.rds")
BGs_pop_I <- readRDS("~outputs/30/34_BGs_pop_I.rds")
tracts_per100_I <- readRDS("~outputs/30/34_tracts_per100_I.rds")
BGs_per100_I <- readRDS("~outputs/30/34_BGs_per100_I.rds")
# crime Moran's I tract
I_crime_tr <- map_dfr(tracts_I$byPlace,
~ .x$estimate[1],
.id = "City") %>%
rename(crime_I = `Moran I statistic`)
p_crime_tr <- map_dfr(tracts_I$byPlace,
~ data.frame(pval_crime = .x$p.value),
.id = "City") %>%
mutate(geo = "Tract",
pval_crime = ifelse(pval_crime < 0.01, "< 0.01", "> 0.01"))
# crime Moran's I block group
I_crime_BG <- map_dfr(BGs_I$byPlace,
~ .x$estimate[1],
.id = "City") %>%
rename(crime_I = `Moran I statistic`)
p_crime_BG <- map_dfr(BGs_I$byPlace,
~ data.frame(pval_crime = .x$p.value),
.id = "City") %>%
mutate(geo = "Block Group",
pval_crime = ifelse(pval_crime < 0.01, "< 0.01", "> 0.01"))
# pop Moran's I tract
I_pop_tr <- map_dfr(tracts_pop_I$byPlace,
~ .x$estimate[1],
.id = "City") %>%
rename(pop_I = `Moran I statistic`)
p_pop_tr <- map_dfr(tracts_pop_I$byPlace,
~ data.frame(pval_pop = .x$p.value),
.id = "City") %>%
mutate(geo = "Tract",
pval_pop = ifelse(pval_pop < 0.01, "< 0.01", "> 0.01"))
# pop Moran's I block group
I_pop_BG <- map_dfr(BGs_pop_I$byPlace,
~ .x$estimate[1],
.id = "City") %>%
rename(pop_I = `Moran I statistic`)
p_pop_BG <- map_dfr(BGs_pop_I$byPlace,
~ data.frame(pval_pop = .x$p.value),
.id = "City") %>%
mutate(geo = "Block Group",
pval_pop = ifelse(pval_pop < 0.01, "< 0.01", "> 0.01"))
# per100 Moran's I tract
I_per100_tr <- map_dfr(tracts_per100_I$byPlace,
~ .x$estimate[1],
.id = "City") %>%
rename(per100_I = `Moran I statistic`)
p_per100_tr <- map_dfr(tracts_per100_I$byPlace,
~ data.frame(pval_per100 = .x$p.value),
.id = "City") %>%
mutate(geo = "Tract",
pval_per100 = ifelse(pval_per100 < 0.01, "< 0.01", "> 0.01"))
# per100 Moran's I block group
I_per100_BG <- map_dfr(BGs_per100_I$byPlace,
~ .x$estimate[1],
.id = "City") %>%
rename(per100_I = `Moran I statistic`)
p_per100_BG <- map_dfr(BGs_per100_I$byPlace,
~ data.frame(pval_per100 = .x$p.value),
.id = "City") %>%
mutate(geo = "Block Group",
pval_per100 = ifelse(pval_per100 < 0.01, "< 0.01", "> 0.01"))
I_crime_tmp <- left_join(I_crime_tr, p_crime_tr,
by = "City") %>%
rbind(left_join(I_crime_BG, p_crime_BG,
by = "City"))
I_pop_tmp <- left_join(I_pop_tr, p_pop_tr,
by = "City") %>%
rbind(left_join(I_pop_BG, p_pop_BG,
by = "City"))
I_per100_tmp <- left_join(I_per100_tr, p_per100_tr,
by = "City") %>%
rbind(left_join(I_per100_BG, p_per100_BG,
by = "City"))
I_tmp <- left_join(I_crime_tmp, I_pop_tmp, by = c("City", "geo")) %>%
left_join(I_per100_tmp, by = c("City", "geo"))
I_wide <- I_tmp %>%
pivot_wider(names_from = "geo",
values_from = c("crime_I", "pop_I", "pval_crime", "pval_pop", "per100_I", "pval_per100")) %>%
dplyr::select(City,
per100_tr = per100_I_Tract,
per100_tr_p = pval_per100_Tract,
crime_tr = crime_I_Tract,
crime_tr_p = pval_crime_Tract,
pop_tr = pop_I_Tract,
pop_tr_p = pval_pop_Tract,
per100_BG = `per100_I_Block Group`,
per100_BG_p = `pval_per100_Block Group`,
crime_BG = `crime_I_Block Group`,
crime_BG_p = `pval_crime_Block Group`,
pop_BG = `pop_I_Block Group`,
pop_BG_p = `pval_pop_Block Group`)
Below is a table showing Moran’s I for crimes per 100 for census tracts and block groups for each study area selected by the relevant Census-designated place, raw crime counts, and population. We see a big range from 0 (essentially random gun crime distribution relative to population) to very high
Questions:
I_wide %>%
mutate(crime_tr_p = cell_spec(crime_tr_p,
"html",
background = ifelse(str_detect(crime_tr_p, ">"),
"red",
"white")),
pop_tr_p = cell_spec(pop_tr_p,
"html",
background = ifelse(str_detect(pop_tr_p, ">"),
"red",
"white")),
per100_tr_p = cell_spec(per100_tr_p,
"html",
background = ifelse(str_detect(per100_tr_p, ">"),
"red",
"white")),
crime_BG_p = cell_spec(crime_BG_p,
"html",
background = ifelse(str_detect(crime_BG_p, ">"),
"red",
"white")),
pop_BG_p = cell_spec(pop_BG_p,
"html",
background = ifelse(str_detect(pop_BG_p, ">"),
"red",
"white")),
per100_BG_p = cell_spec(per100_BG_p,
"html",
background = ifelse(str_detect(per100_BG_p, ">"),
"red",
"white"))) %>%
arrange(desc(per100_tr)) %>%
kable(format = "html",
escape = FALSE,
digits = 2,
caption = "Moran's I for Crimes per 100 people, Crime Count, and Population by Tract and Block Group") %>%
kable_styling(bootstrap_options = "striped")
| City | per100_tr | per100_tr_p | crime_tr | crime_tr_p | pop_tr | pop_tr_p | per100_BG | per100_BG_p | crime_BG | crime_BG_p | pop_BG | pop_BG_p |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Hartford | 0.74 | < 0.01 | 0.68 | < 0.01 | 0.16 | < 0.01 | 0.75 | < 0.01 | 0.71 | < 0.01 | 0.12 | < 0.01 |
| Chicago | 0.74 | < 0.01 | 0.59 | < 0.01 | 0.31 | < 0.01 | 0.69 | < 0.01 | 0.56 | < 0.01 | 0.22 | < 0.01 |
| Indianapolis | 0.71 | < 0.01 | 0.56 | < 0.01 | 0.37 | < 0.01 | 0.68 | < 0.01 | 0.50 | < 0.01 | 0.35 | < 0.01 |
| Louisville | 0.70 | < 0.01 | 0.56 | < 0.01 | 0.23 | < 0.01 | 0.62 | < 0.01 | 0.55 | < 0.01 | 0.21 | < 0.01 |
| Boston | 0.69 | < 0.01 | 0.61 | < 0.01 | 0.12 | < 0.01 | 0.50 | < 0.01 | 0.52 | < 0.01 | 0.05 | < 0.01 |
| Philadelphia | 0.63 | < 0.01 | 0.64 | < 0.01 | 0.23 | < 0.01 | 0.51 | < 0.01 | 0.47 | < 0.01 | 0.16 | < 0.01 |
| Saint Paul | 0.61 | < 0.01 | 0.48 | < 0.01 | 0.18 | < 0.01 | 0.60 | < 0.01 | 0.53 | < 0.01 | 0.18 | < 0.01 |
| Raleigh | 0.59 | < 0.01 | 0.63 | < 0.01 | 0.36 | < 0.01 | 0.58 | < 0.01 | 0.53 | < 0.01 | 0.27 | < 0.01 |
| Tucson | 0.58 | < 0.01 | 0.55 | < 0.01 | 0.05 | > 0.01 | 0.36 | < 0.01 | 0.41 | < 0.01 | 0.29 | < 0.01 |
| Little Rock | 0.58 | < 0.01 | 0.52 | < 0.01 | 0.13 | > 0.01 | 0.39 | < 0.01 | 0.39 | < 0.01 | 0.15 | < 0.01 |
| Denver | 0.57 | < 0.01 | 0.54 | < 0.01 | 0.17 | < 0.01 | 0.60 | < 0.01 | 0.52 | < 0.01 | 0.11 | < 0.01 |
| Sacramento County | 0.56 | < 0.01 | 0.57 | < 0.01 | 0.13 | < 0.01 | 0.58 | < 0.01 | 0.60 | < 0.01 | 0.17 | < 0.01 |
| Lincoln | 0.56 | < 0.01 | 0.51 | < 0.01 | -0.01 | > 0.01 | 0.51 | < 0.01 | 0.40 | < 0.01 | 0.37 | < 0.01 |
| Dallas | 0.56 | < 0.01 | 0.52 | < 0.01 | 0.30 | < 0.01 | 0.37 | < 0.01 | 0.36 | < 0.01 | 0.26 | < 0.01 |
| Baltimore | 0.56 | < 0.01 | 0.39 | < 0.01 | 0.23 | < 0.01 | 0.40 | < 0.01 | 0.39 | < 0.01 | 0.19 | < 0.01 |
| Baton Rouge | 0.55 | < 0.01 | 0.38 | < 0.01 | 0.27 | < 0.01 | 0.52 | < 0.01 | 0.50 | < 0.01 | 0.24 | < 0.01 |
| Nashville | 0.55 | < 0.01 | 0.49 | < 0.01 | 0.27 | < 0.01 | 0.30 | < 0.01 | 0.36 | < 0.01 | 0.38 | < 0.01 |
| St Louis County | 0.53 | < 0.01 | 0.49 | < 0.01 | 0.19 | < 0.01 | 0.54 | < 0.01 | 0.55 | < 0.01 | 0.27 | < 0.01 |
| Minneapolis | 0.53 | < 0.01 | 0.46 | < 0.01 | 0.12 | < 0.01 | 0.61 | < 0.01 | 0.61 | < 0.01 | 0.21 | < 0.01 |
| San Francisco | 0.50 | < 0.01 | 0.51 | < 0.01 | 0.14 | < 0.01 | 0.48 | < 0.01 | 0.66 | < 0.01 | 0.05 | < 0.01 |
| Atlanta | 0.48 | < 0.01 | 0.41 | < 0.01 | 0.26 | < 0.01 | 0.44 | < 0.01 | 0.40 | < 0.01 | 0.18 | < 0.01 |
| Columbia | 0.47 | < 0.01 | 0.32 | < 0.01 | 0.19 | < 0.01 | 0.37 | < 0.01 | 0.30 | < 0.01 | 0.25 | < 0.01 |
| Salt Lake City | 0.47 | < 0.01 | 0.13 | < 0.01 | 0.21 | < 0.01 | 0.46 | < 0.01 | 0.07 | < 0.01 | 0.34 | < 0.01 |
| Los Angeles | 0.43 | < 0.01 | 0.46 | < 0.01 | 0.11 | < 0.01 | 0.36 | < 0.01 | 0.39 | < 0.01 | 0.17 | < 0.01 |
| Detroit | 0.43 | < 0.01 | 0.41 | < 0.01 | 0.33 | < 0.01 | 0.27 | < 0.01 | 0.35 | < 0.01 | 0.26 | < 0.01 |
| Portland | 0.40 | < 0.01 | 0.32 | < 0.01 | 0.26 | < 0.01 | 0.35 | < 0.01 | 0.43 | < 0.01 | 0.33 | < 0.01 |
| Kansas City | 0.39 | < 0.01 | 0.45 | < 0.01 | 0.40 | < 0.01 | 0.38 | < 0.01 | 0.29 | < 0.01 | 0.41 | < 0.01 |
| Madison | 0.37 | < 0.01 | 0.26 | < 0.01 | 0.27 | < 0.01 | 0.36 | < 0.01 | 0.29 | < 0.01 | 0.14 | < 0.01 |
| Auburn | 0.33 | < 0.01 | 0.23 | < 0.01 | -0.25 | > 0.01 | 0.38 | < 0.01 | 0.38 | < 0.01 | 0.00 | > 0.01 |
| New York | 0.33 | < 0.01 | 0.46 | < 0.01 | 0.32 | < 0.01 | 0.35 | < 0.01 | 0.44 | < 0.01 | 0.15 | < 0.01 |
| Phoenix | 0.32 | < 0.01 | 0.31 | < 0.01 | 0.17 | < 0.01 | 0.19 | < 0.01 | 0.20 | < 0.01 | 0.26 | < 0.01 |
| Virginia Beach | 0.30 | < 0.01 | 0.28 | < 0.01 | 0.18 | < 0.01 | 0.18 | < 0.01 | 0.23 | < 0.01 | 0.15 | < 0.01 |
| Gainesville | 0.30 | < 0.01 | 0.29 | < 0.01 | 0.02 | > 0.01 | 0.35 | < 0.01 | 0.28 | < 0.01 | 0.01 | > 0.01 |
| Cincinnati | 0.27 | < 0.01 | 0.16 | < 0.01 | 0.27 | < 0.01 | 0.38 | < 0.01 | 0.14 | < 0.01 | 0.13 | < 0.01 |
The plots below show the Moran’s I (extent of spatial clustering), count of gun crimes, percentage of all crimes that are gun crimes, and Siegel Score for every city by year. The x-axis is aligned on each, so you can read straight down, but some of the cities with fewer years of data will look compressed horizontally.
A note on outliers: I noticed last time that several cities (NYC, LA, SF, others) had Moran’s Is of around 0, and this persisted even when looking at them year-by-year, despite visual evidence in the maps showing clear clustering (see map of San Francisco above, for example). This was due to outliers in the gun crimes / 100 stat (a few gun crimes in an area with low population lead to extremely high values). So, I filtered out the block groups every year that had values higher than the 99.5th percentile in each city. In LA, this had the effect of changing the Moran’s I from 0 to around 0.55, a much more sensible value. The 99.5th percentile cut-off is arbitrary. Also, those block groups were filtered out rather than imputed, so they would be empty on a map.
Boston: Total gun crimes and clustering dropped over the last decade.
Chicago: clustering is steady over time, while total gun crimes have decreased. Siegel score has steadily risen since 1991.
Denver: Gun crimes and clustering seem to have increased. Crime data available from 2014.
Detroit: Decrease in gun crimes and clustering. Steady Siegel Score.
Hartford: Clustering has dipped, while both annual gun crimes and Siegel Score have risen.
Indianapolis: Gradual decline in clustering. Sudden drop in gun crimes (reporting change?). Little change to Siegel Score.
Los Angeles: Rising Siegel Score, little change to overall crimes or clustering.
Louisville: Sharp increase in crimes, little change to clustering and Siegel.
Minneapolis: Decrease in clustering, increase in crimes and Siegel Score.
Nashville: Decrease in clustering, increase in crimes.
New York: Decrease in crimes and increase in Siegel Score. Not change to clustering.
Philly: Decrease in crimes, maybe a slight increase in clustering, little change to Siegel Score.
Portland: Limited sample, but all three seem to have risen recently.
San Francisco: Increase in Siegel Score, but not much change to total crimes or clustering.
St. Louis County: Decrease in Siegel Score and increase in crimes.
Tucson: Decrease in crimes and clustering.
These maps show high- and low-crime clusters (crimes per 100), as well as areas with high- or low-crime compared to their neighbors. Areas in white not shown to have a statistically significant relationship with their neighbors.
Notes:
I calculated this for every year as well, so we can see how clusters move, but not sure how to visualize it. A gif, maybe?
Next step is to compare demographics across the categories, or at least the hotspots vs. the whole city.
Intuitively, these maps make sense. At least for the cities I know, the hot spot locations are not surprising.
Generally, there are far more hotspots than there are cold spots. Makes sense, a long right tail for gun crimes per capita.
Some edge effects, it seems, particularly with the “low crime” areas. See Baltimore and Chicago for example. For spatial weights calculation, weights are standardized over all links to the block group, so block groups on the edge naturally have fewer neighbors that are weighed more highly.
Very few “outliers”, where the block group is very different from their neighbor.
BGs_per100_localI_census <- readRDS("~outputs/30/34_BGs_per100_localI_census.rds")
BGs_per100_localI_census_tmp <- map(
BGs_per100_localI_census,
~ .x %>%
st_drop_geometry() %>%
mutate(hotspot = ifelse(str_detect(cluster, "high"),
"Yes",
"No"),
majorityMinority_tmp = ifelse(majorityMinority == "Yes",
1,
0)) %>%
group_by(hotspot) %>%
summarize(Hotspots = n(),
MdHHInc_weighted = round(weighted.mean(MdHHInc, TotPop, na.rm = TRUE), 0),
Age_weighted = round(weighted.mean(MdAge, TotPop, na.rm = TRUE), 1),
White_pct_weighted = round(weighted.mean(White_pct, TotPop, na.rm = TRUE), 2),
Black_pct_weighted = round(weighted.mean(Black_pct, TotPop, na.rm = TRUE), 2),
TotPop = sum(TotPop, na.rm = TRUE),
.groups = "drop") %>%
arrange(desc(hotspot)))
num_rows <- map(1:length(names(BGs_per100_localI_census_tmp)),
~ nrow(BGs_per100_localI_census_tmp[[.x]])) %>%
unlist()
BGs_per100_localI_census_tmp %>%
bind_rows() %>%
# arrange(Species) %>% # same order as table results
# select(-Species) %>%
kable("html",
caption = "Weighted averages by Moran's I hotspots",
# align = "",
col.names = c("Hotspot?",
"# block groups",
"Med. HH Inc.",
"Med. Age",
"White %",
"Black %",
"Total Pop.")
) %>%
kable_styling(full_width = F,
fixed_thead = T) %>%
group_rows(index = setNames(num_rows, names(BGs_per100_localI_census_tmp)),
label_row_css = "background-color: #666; color: #fff;")
| Hotspot? | # block groups | Med. HH Inc. | Med. Age | White % | Black % | Total Pop. |
|---|---|---|---|---|---|---|
| Atlanta | ||||||
| Yes | 51 | 39698 | 35.2 | 0.19 | 0.76 | 55122 |
| No | 281 | 71100 | 35.5 | 0.43 | 0.49 | 456191 |
| Auburn | ||||||
| Yes | 6 | 48459 | 34.1 | 0.55 | 0.05 | 8082 |
| No | 47 | 80273 | 36.4 | 0.67 | 0.05 | 81083 |
| Baltimore | ||||||
| Yes | 135 | 77322 | 36.4 | 0.43 | 0.47 | 120267 |
| No | 518 | 52502 | 36.8 | 0.27 | 0.66 | 494433 |
| Baton Rouge | ||||||
| Yes | 33 | 37681 | 34.6 | 0.17 | 0.81 | 31524 |
| No | 188 | 57240 | 34.1 | 0.45 | 0.47 | 265448 |
| Boston | ||||||
| Yes | 93 | 41300 | 33.2 | 0.18 | 0.58 | 104270 |
| No | 467 | 78750 | 34.0 | 0.59 | 0.19 | 575143 |
| Chicago | ||||||
| Yes | 379 | 31197 | 35.6 | 0.08 | 0.86 | 332310 |
| No | 1809 | 66069 | 35.5 | 0.55 | 0.22 | 2401481 |
| Cincinnati | ||||||
| Yes | 37 | 46270 | 31.5 | 0.37 | 0.59 | 30777 |
| No | 260 | 47289 | 35.5 | 0.54 | 0.39 | 294029 |
| Columbia | ||||||
| Yes | 15 | 23326 | 32.4 | 0.06 | 0.90 | 16100 |
| No | 146 | 53314 | 33.6 | 0.50 | 0.42 | 207465 |
| Dallas | ||||||
| Yes | 99 | 43285 | 34.5 | 0.43 | 0.50 | 112063 |
| No | 873 | 63087 | 33.9 | 0.64 | 0.22 | 1325099 |
| Denver | ||||||
| Yes | 83 | 54732 | 31.9 | 0.72 | 0.13 | 117094 |
| No | 398 | 76546 | 36.2 | 0.77 | 0.09 | 576323 |
| Detroit | ||||||
| Yes | 81 | 43447 | 36.1 | 0.20 | 0.70 | 49983 |
| No | 798 | 32207 | 35.4 | 0.14 | 0.79 | 627172 |
| Gainesville | ||||||
| Yes | 8 | 30648 | 33.5 | 0.46 | 0.48 | 9963 |
| No | 95 | 44416 | 32.6 | 0.66 | 0.22 | 157091 |
| Hartford | ||||||
| Yes | 24 | 40970 | 34.7 | 0.30 | 0.45 | 28185 |
| No | 72 | 37683 | 32.4 | 0.34 | 0.35 | 95443 |
| Indianapolis | ||||||
| Yes | 161 | 60320 | 37.2 | 0.65 | 0.27 | 196949 |
| No | 436 | 50930 | 35.1 | 0.61 | 0.29 | 692428 |
| Lincoln | ||||||
| Yes | 41 | 66177 | 35.5 | 0.84 | 0.06 | 66895 |
| No | 150 | 65339 | 35.0 | 0.86 | 0.04 | 227681 |
| Little Rock | ||||||
| Yes | 16 | 31456 | 36.9 | 0.11 | 0.86 | 11367 |
| No | 142 | 62694 | 37.7 | 0.54 | 0.38 | 195075 |
| Los Angeles | ||||||
| Yes | 230 | 42359 | 34.1 | 0.36 | 0.24 | 321423 |
| No | 2284 | 68040 | 36.4 | 0.54 | 0.08 | 3644059 |
| Louisville | ||||||
| Yes | 47 | 41741 | 34.9 | 0.47 | 0.48 | 50009 |
| No | 223 | 46990 | 37.9 | 0.66 | 0.27 | 239247 |
| Madison | ||||||
| Yes | 26 | 72068 | 35.3 | 0.78 | 0.06 | 44842 |
| No | 154 | 67889 | 34.4 | 0.78 | 0.07 | 249411 |
| Minneapolis | ||||||
| Yes | 50 | 41905 | 29.0 | 0.29 | 0.44 | 55655 |
| No | 328 | 69843 | 34.1 | 0.69 | 0.16 | 360366 |
| Nashville | ||||||
| Yes | 34 | 45325 | 34.8 | 0.38 | 0.57 | 39657 |
| No | 429 | 64586 | 35.8 | 0.65 | 0.26 | 633184 |
| New York | ||||||
| Yes | 664 | 38283 | 33.5 | 0.18 | 0.44 | 808941 |
| No | 5780 | 72515 | 37.8 | 0.45 | 0.22 | 7634772 |
| NA | 3 | NaN | NaN | NaN | NaN | 0 |
| Philadelphia | ||||||
| Yes | 328 | 48246 | 38.2 | 0.48 | 0.37 | 375273 |
| No | 1008 | 49113 | 34.8 | 0.39 | 0.44 | 1200249 |
| Phoenix | ||||||
| Yes | 61 | 35395 | 30.9 | 0.65 | 0.10 | 86118 |
| No | 916 | 63668 | 34.6 | 0.73 | 0.07 | 1561429 |
| Portland | ||||||
| Yes | 43 | 44274 | 37.1 | 0.66 | 0.08 | 71657 |
| No | 427 | 78454 | 38.4 | 0.79 | 0.05 | 634927 |
| Raleigh | ||||||
| Yes | 36 | 46167 | 33.9 | 0.38 | 0.50 | 59040 |
| No | 208 | 73996 | 35.6 | 0.62 | 0.25 | 467287 |
| Sacramento County | ||||||
| Yes | 128 | 43046 | 33.6 | 0.53 | 0.13 | 219082 |
| No | 784 | 75332 | 37.5 | 0.59 | 0.09 | 1290941 |
| Saint Paul | ||||||
| Yes | 75 | 64446 | 31.8 | 0.56 | 0.18 | 84414 |
| No | 175 | 56281 | 32.8 | 0.57 | 0.15 | 218346 |
| Salt Lake City | ||||||
| Yes | 18 | 40054 | 33.8 | 0.65 | 0.04 | 27342 |
| No | 133 | 70099 | 33.0 | 0.74 | 0.02 | 185408 |
| San Francisco | ||||||
| Yes | 49 | 57596 | 38.4 | 0.32 | 0.17 | 75151 |
| No | 530 | 117715 | 39.5 | 0.48 | 0.04 | 794893 |
| NA | 1 | 59063 | 26.2 | 0.39 | 0.20 | 3064 |
| St Louis County | ||||||
| Yes | 58 | 40575 | 35.2 | 0.18 | 0.78 | 74701 |
| No | 634 | 82581 | 41.5 | 0.72 | 0.20 | 923983 |
| Tucson | ||||||
| Yes | 77 | 60834 | 38.7 | 0.75 | 0.04 | 134939 |
| No | 331 | 45404 | 35.7 | 0.73 | 0.05 | 470850 |
| Virginia Beach | ||||||
| Yes | 22 | 53524 | 31.2 | 0.44 | 0.38 | 29167 |
| No | 280 | 82502 | 38.1 | 0.68 | 0.18 | 420968 |